import pandas as pd
import numpy as np
import math
import seaborn as sns
#from numpy import array, hstack, math
#from numpy.random import uniform
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split,cross_val_score,RepeatedKFold
from sklearn.tree import DecisionTreeRegressor
from sklearn.ensemble import RandomForestRegressor,GradientBoostingRegressor
from sklearn.metrics import mean_squared_error, r2_score, explained_variance_score, mean_absolute_error
from sklearn.feature_selection import RFE
from sklearn.linear_model import LinearRegression
from sklearn.preprocessing import StandardScaler
from sklearn.neighbors import KNeighborsRegressor
from sklearn import set_config
from sklearn.svm import LinearSVR
from sklearn.multioutput import MultiOutputRegressor,RegressorChain
import lightgbm as lgb
# Make numpy printouts easier to read.
np.set_printoptions(precision=3, suppress=True)
pd.set_option('display.max_columns', None)
pd.set_option('precision', 2)
#Read the csv file
df=pd.read_csv('C:/Users/ravip/Desktop/Florida_Orlando_Data.csv' , skiprows=9)
print(df.head(5))
timestamp Temp_2m RH_2m MSL_Pressure Precipitation_Total Snowfall \ 0 20000101T0000 18.45 73 1021.9 0.0 0 1 20000101T0100 18.02 75 1022.0 0.0 0 2 20000101T0200 17.55 77 1021.9 0.0 0 3 20000101T0300 17.03 80 1021.5 0.0 0 4 20000101T0400 16.57 82 1021.5 0.0 0 total_cld_lay high_cld_lay mid_cld_lay low_cld_lay Sunshine_Duration \ 0 18.0 0 0 18 0.0 1 20.0 0 0 20 0.0 2 21.0 0 0 21 0.0 3 15.0 0 0 15 0.0 4 25.0 0 0 25 0.0 Shortwave_Radiation Direct_Shortwave_Radiation \ 0 0.0 0.0 1 0.0 0.0 2 0.0 0.0 3 0.0 0.0 4 0.0 0.0 Diffuse_Shortwave_Radiation Evptrp FAO_Ref_Evptrp_2m CAPE Wind_Gust \ 0 0.0 0.0 2.77e-03 24 0.36 1 0.0 0.0 0.00e+00 18 1.08 2 0.0 0.0 0.00e+00 7 2.88 3 0.0 0.0 0.00e+00 0 4.68 4 0.0 0.0 0.00e+00 3 5.76 Wind_Speed_10m Wind_Dir_10m Wind_Speed_80m Wind_Dir_80m \ 0 3.60 143.13 6.29 166.76 1 3.10 144.46 6.83 161.57 2 3.05 135.00 7.09 156.04 3 3.10 125.54 7.10 149.53 4 3.40 122.01 7.59 148.57 Wind_Speed_900mb Wind_Dir_900mb Wind_Speed_850mb Wind_Dir_850mb \ 0 10.97 221.01 21.92 227.66 1 12.54 219.17 23.11 232.59 2 13.55 219.61 23.71 239.93 3 15.04 222.09 24.54 247.57 4 15.79 223.15 25.52 253.61 Wind_Speed_700_mb] Wind_Dir_700mb Wind_Speed_500mb Wind_Dir_500mb \ 0 31.36 257.40 62.01 254.51 1 30.19 261.77 61.67 254.42 2 29.22 266.47 62.08 255.56 3 28.44 270.73 62.48 258.37 4 28.59 275.78 62.78 262.75 GP_Height_1000mb GP_Height_850mb GP_Height_700mb GP_Height_500mb \ 0 185 1556 3144 5799 1 186 1557 3144 5799 2 185 1556 3143 5797 3 182 1552 3138 5791 4 182 1553 3138 5791 Temp_1000mb Temp_850mb Temp_700mb Temp Soil_Temp Soil_Moisture \ 0 19.15 8.42 4.54 14.17 18.16 0.13 1 18.88 8.56 4.35 13.30 16.96 0.13 2 18.67 8.73 4.12 12.64 15.94 0.13 3 18.52 8.88 3.82 12.12 15.08 0.13 4 18.45 9.04 3.54 11.73 14.37 0.13 Vapor_Pressure_Deficit_2m 0 5.74 1 5.18 2 4.62 3 3.89 4 3.40
#convert the timestamp object type variable into a date time variable to extract year, month, day and time.
df['timestamp'] = pd.to_datetime(df['timestamp'])
df['day'] = df['timestamp'].dt.day
df['year'] = df['timestamp'].dt.year
df['month'] = df['timestamp'].dt.month
def hr_func(ts):
return ts.hour
df['hour_of_day'] = df['timestamp'].apply(hr_func)
#drop the columns that have no data or which are irrelevant to the study
#snowfall feature does not have any data since Orlando is a sub tropical location which does not get any snowfall.
#time stamp feature has been extracted into multiple fields for the ease of analysis and hence this field will be dropped as it has string data that is no longer needed for processing.
data = df.drop(['timestamp', 'Snowfall'], axis=1)
print(data.head(5))
Temp_2m RH_2m MSL_Pressure Precipitation_Total total_cld_lay \ 0 18.45 73 1021.9 0.0 18.0 1 18.02 75 1022.0 0.0 20.0 2 17.55 77 1021.9 0.0 21.0 3 17.03 80 1021.5 0.0 15.0 4 16.57 82 1021.5 0.0 25.0 high_cld_lay mid_cld_lay low_cld_lay Sunshine_Duration \ 0 0 0 18 0.0 1 0 0 20 0.0 2 0 0 21 0.0 3 0 0 15 0.0 4 0 0 25 0.0 Shortwave_Radiation Direct_Shortwave_Radiation \ 0 0.0 0.0 1 0.0 0.0 2 0.0 0.0 3 0.0 0.0 4 0.0 0.0 Diffuse_Shortwave_Radiation Evptrp FAO_Ref_Evptrp_2m CAPE Wind_Gust \ 0 0.0 0.0 2.77e-03 24 0.36 1 0.0 0.0 0.00e+00 18 1.08 2 0.0 0.0 0.00e+00 7 2.88 3 0.0 0.0 0.00e+00 0 4.68 4 0.0 0.0 0.00e+00 3 5.76 Wind_Speed_10m Wind_Dir_10m Wind_Speed_80m Wind_Dir_80m \ 0 3.60 143.13 6.29 166.76 1 3.10 144.46 6.83 161.57 2 3.05 135.00 7.09 156.04 3 3.10 125.54 7.10 149.53 4 3.40 122.01 7.59 148.57 Wind_Speed_900mb Wind_Dir_900mb Wind_Speed_850mb Wind_Dir_850mb \ 0 10.97 221.01 21.92 227.66 1 12.54 219.17 23.11 232.59 2 13.55 219.61 23.71 239.93 3 15.04 222.09 24.54 247.57 4 15.79 223.15 25.52 253.61 Wind_Speed_700_mb] Wind_Dir_700mb Wind_Speed_500mb Wind_Dir_500mb \ 0 31.36 257.40 62.01 254.51 1 30.19 261.77 61.67 254.42 2 29.22 266.47 62.08 255.56 3 28.44 270.73 62.48 258.37 4 28.59 275.78 62.78 262.75 GP_Height_1000mb GP_Height_850mb GP_Height_700mb GP_Height_500mb \ 0 185 1556 3144 5799 1 186 1557 3144 5799 2 185 1556 3143 5797 3 182 1552 3138 5791 4 182 1553 3138 5791 Temp_1000mb Temp_850mb Temp_700mb Temp Soil_Temp Soil_Moisture \ 0 19.15 8.42 4.54 14.17 18.16 0.13 1 18.88 8.56 4.35 13.30 16.96 0.13 2 18.67 8.73 4.12 12.64 15.94 0.13 3 18.52 8.88 3.82 12.12 15.08 0.13 4 18.45 9.04 3.54 11.73 14.37 0.13 Vapor_Pressure_Deficit_2m day year month hour_of_day 0 5.74 1 2000 1 0 1 5.18 1 2000 1 1 2 4.62 1 2000 1 2 3 3.89 1 2000 1 3 4 3.40 1 2000 1 4
#check for the info regarding the data
data.info()
<class 'pandas.core.frame.DataFrame'> RangeIndex: 182448 entries, 0 to 182447 Data columns (total 43 columns): # Column Non-Null Count Dtype --- ------ -------------- ----- 0 Temp_2m 182448 non-null float64 1 RH_2m 182448 non-null int64 2 MSL_Pressure 182448 non-null float64 3 Precipitation_Total 182448 non-null float64 4 total_cld_lay 182448 non-null float64 5 high_cld_lay 182448 non-null int64 6 mid_cld_lay 182448 non-null int64 7 low_cld_lay 182448 non-null int64 8 Sunshine_Duration 182448 non-null float64 9 Shortwave_Radiation 182448 non-null float64 10 Direct_Shortwave_Radiation 182448 non-null float64 11 Diffuse_Shortwave_Radiation 182448 non-null float64 12 Evptrp 182448 non-null float64 13 FAO_Ref_Evptrp_2m 182448 non-null float64 14 CAPE 182448 non-null int64 15 Wind_Gust 182448 non-null float64 16 Wind_Speed_10m 182448 non-null float64 17 Wind_Dir_10m 182448 non-null float64 18 Wind_Speed_80m 182448 non-null float64 19 Wind_Dir_80m 182448 non-null float64 20 Wind_Speed_900mb 182448 non-null float64 21 Wind_Dir_900mb 182448 non-null float64 22 Wind_Speed_850mb 182448 non-null float64 23 Wind_Dir_850mb 182448 non-null float64 24 Wind_Speed_700_mb] 182448 non-null float64 25 Wind_Dir_700mb 182448 non-null float64 26 Wind_Speed_500mb 182448 non-null float64 27 Wind_Dir_500mb 182448 non-null float64 28 GP_Height_1000mb 182448 non-null int64 29 GP_Height_850mb 182448 non-null int64 30 GP_Height_700mb 182448 non-null int64 31 GP_Height_500mb 182448 non-null int64 32 Temp_1000mb 182448 non-null float64 33 Temp_850mb 182448 non-null float64 34 Temp_700mb 182448 non-null float64 35 Temp 182448 non-null float64 36 Soil_Temp 182448 non-null float64 37 Soil_Moisture 182448 non-null float64 38 Vapor_Pressure_Deficit_2m 182448 non-null float64 39 day 182448 non-null int64 40 year 182448 non-null int64 41 month 182448 non-null int64 42 hour_of_day 182448 non-null int64 dtypes: float64(30), int64(13) memory usage: 59.9 MB
#check for any missing or null values in the features and labels.
data.isnull().sum()
Temp_2m 0 RH_2m 0 MSL_Pressure 0 Precipitation_Total 0 total_cld_lay 0 high_cld_lay 0 mid_cld_lay 0 low_cld_lay 0 Sunshine_Duration 0 Shortwave_Radiation 0 Direct_Shortwave_Radiation 0 Diffuse_Shortwave_Radiation 0 Evptrp 0 FAO_Ref_Evptrp_2m 0 CAPE 0 Wind_Gust 0 Wind_Speed_10m 0 Wind_Dir_10m 0 Wind_Speed_80m 0 Wind_Dir_80m 0 Wind_Speed_900mb 0 Wind_Dir_900mb 0 Wind_Speed_850mb 0 Wind_Dir_850mb 0 Wind_Speed_700_mb] 0 Wind_Dir_700mb 0 Wind_Speed_500mb 0 Wind_Dir_500mb 0 GP_Height_1000mb 0 GP_Height_850mb 0 GP_Height_700mb 0 GP_Height_500mb 0 Temp_1000mb 0 Temp_850mb 0 Temp_700mb 0 Temp 0 Soil_Temp 0 Soil_Moisture 0 Vapor_Pressure_Deficit_2m 0 day 0 year 0 month 0 hour_of_day 0 dtype: int64
#drop any duplicate records from the data
data = data.drop_duplicates()
data.shape
(182448, 43)
data.describe()
| Temp_2m | RH_2m | MSL_Pressure | Precipitation_Total | total_cld_lay | high_cld_lay | mid_cld_lay | low_cld_lay | Sunshine_Duration | Shortwave_Radiation | Direct_Shortwave_Radiation | Diffuse_Shortwave_Radiation | Evptrp | FAO_Ref_Evptrp_2m | CAPE | Wind_Gust | Wind_Speed_10m | Wind_Dir_10m | Wind_Speed_80m | Wind_Dir_80m | Wind_Speed_900mb | Wind_Dir_900mb | Wind_Speed_850mb | Wind_Dir_850mb | Wind_Speed_700_mb] | Wind_Dir_700mb | Wind_Speed_500mb | Wind_Dir_500mb | GP_Height_1000mb | GP_Height_850mb | GP_Height_700mb | GP_Height_500mb | Temp_1000mb | Temp_850mb | Temp_700mb | Temp | Soil_Temp | Soil_Moisture | Vapor_Pressure_Deficit_2m | day | year | month | hour_of_day | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| count | 182448.00 | 182448.00 | 182448.00 | 182448.00 | 182448.00 | 182448.00 | 182448.00 | 182448.00 | 182448.00 | 182448.00 | 182448.00 | 182448.00 | 1.82e+05 | 1.82e+05 | 182448.00 | 182448.00 | 182448.00 | 182448.00 | 182448.00 | 182448.00 | 182448.00 | 182448.00 | 182448.00 | 182448.00 | 182448.00 | 182448.00 | 182448.00 | 182448.00 | 182448.00 | 182448.00 | 182448.00 | 182448.00 | 182448.00 | 182448.00 | 182448.00 | 182448.00 | 182448.00 | 182448.00 | 1.82e+05 | 182448.00 | 182448.00 | 182448.00 | 182448.00 |
| mean | 22.13 | 72.87 | 1017.76 | 0.07 | 41.77 | 25.71 | 24.06 | 28.87 | 16.81 | 182.82 | 105.68 | 77.14 | 9.59e-02 | 1.48e-01 | 327.79 | 18.44 | 12.27 | 160.23 | 18.54 | 162.79 | 23.08 | 180.52 | 23.39 | 194.10 | 29.88 | 217.07 | 45.21 | 234.73 | 151.17 | 1541.25 | 3154.69 | 5833.44 | 21.83 | 13.35 | 6.11 | 22.94 | 22.75 | 0.16 | 8.41e+00 | 15.72 | 2009.91 | 6.48 | 11.50 |
| std | 6.28 | 17.89 | 4.37 | 0.38 | 42.98 | 39.56 | 39.70 | 39.69 | 23.97 | 252.53 | 171.48 | 96.07 | 1.27e-01 | 1.85e-01 | 418.77 | 8.85 | 6.67 | 95.91 | 8.14 | 95.12 | 15.07 | 91.57 | 15.71 | 93.05 | 21.28 | 88.86 | 33.49 | 80.82 | 35.51 | 33.16 | 43.07 | 65.12 | 5.58 | 3.92 | 2.90 | 7.70 | 5.94 | 0.03 | 7.28e+00 | 8.80 | 6.01 | 3.43 | 6.92 |
| min | -6.25 | 14.00 | 975.60 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | -13.46 | 0.00 | 0.00e+00 | 0.00e+00 | 0.00 | 0.00 | 0.00 | 0.60 | 0.00 | 0.56 | 0.00 | 0.28 | 0.00 | 0.31 | 0.00 | 0.34 | 0.00 | 0.21 | -207.00 | 1202.00 | 2866.00 | 5409.00 | -6.79 | -7.68 | -12.75 | -6.56 | 0.58 | 0.04 | -1.91e-06 | 1.00 | 2000.00 | 1.00 | 0.00 |
| 25% | 18.45 | 59.00 | 1014.90 | 0.00 | 0.90 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | 1.44e-03 | 5.41e-03 | 0.00 | 11.88 | 7.29 | 81.87 | 12.74 | 85.07 | 11.97 | 100.84 | 12.02 | 112.11 | 14.34 | 163.30 | 19.32 | 217.18 | 129.00 | 1523.00 | 3132.00 | 5795.00 | 18.65 | 11.16 | 4.33 | 18.42 | 19.18 | 0.15 | 2.48e+00 | 8.00 | 2005.00 | 4.00 | 5.75 |
| 50% | 23.04 | 75.00 | 1017.50 | 0.00 | 16.20 | 0.00 | 0.00 | 10.00 | 0.00 | 15.13 | 2.05 | 6.86 | 1.87e-02 | 5.07e-02 | 122.00 | 18.00 | 10.83 | 138.01 | 18.71 | 139.30 | 20.09 | 185.03 | 20.07 | 213.69 | 24.27 | 243.43 | 35.65 | 257.47 | 150.00 | 1545.00 | 3161.00 | 5846.00 | 22.92 | 14.15 | 6.70 | 23.02 | 23.61 | 0.17 | 6.30e+00 | 16.00 | 2010.00 | 6.00 | 11.50 |
| 75% | 26.66 | 89.00 | 1020.40 | 0.00 | 100.00 | 41.00 | 48.00 | 67.00 | 43.15 | 316.84 | 157.47 | 158.89 | 1.67e-01 | 2.51e-01 | 562.00 | 23.40 | 16.20 | 243.43 | 23.77 | 248.96 | 30.86 | 254.17 | 30.93 | 266.19 | 40.16 | 279.21 | 64.67 | 283.39 | 173.00 | 1564.00 | 3185.00 | 5884.00 | 25.89 | 16.48 | 8.29 | 28.01 | 26.86 | 0.18 | 1.27e+01 | 23.00 | 2015.00 | 9.00 | 17.25 |
| max | 39.00 | 100.00 | 1037.10 | 27.60 | 100.00 | 100.00 | 100.00 | 100.00 | 60.00 | 996.80 | 690.80 | 340.88 | 6.15e-01 | 9.10e-01 | 4277.00 | 104.40 | 78.41 | 360.00 | 104.52 | 360.00 | 173.22 | 360.00 | 182.76 | 360.00 | 176.24 | 360.00 | 228.53 | 360.00 | 298.00 | 1651.00 | 3268.00 | 5970.00 | 36.30 | 22.08 | 17.81 | 48.80 | 39.42 | 0.30 | 5.47e+01 | 31.00 | 2020.00 | 12.00 | 23.00 |
sns.set(rc={'figure.figsize':(17,17)})
# Calculate correlations
correlations = data.corr()
# Print the Correlation Heatmap using seaborn
sns.heatmap(correlations)
<AxesSubplot:>
# Dividing dataset into label and feature sets
#features = data_modified.drop(['Temp_2m','RH_2m'], axis = 1) # Features
#labels = data_modified[['Temp_2m','RH_2m']] # Labels
features = data.drop(['Temp_2m','RH_2m'], axis = 1) # Features
labels = data[['Temp_2m','RH_2m']] # Labels
print(type(features))
print(type(labels))
print(features.shape)
print(labels.shape)
<class 'pandas.core.frame.DataFrame'> <class 'pandas.core.frame.DataFrame'> (182448, 41) (182448, 2)
# feature importance estimation to see which features can help in better training of the model.
feature_importance = DecisionTreeRegressor()
feature_importance.fit(features,labels)
print(feature_importance.feature_importances_)
for feat, importance in zip(features.columns, feature_importance.feature_importances_):
print ('feature: {f}, importance: {i}'.format(f=feat, i=importance))
[0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.032 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.123 0.008 0. 0.041 0.001 0. 0.791 0. 0. 0.001 0. ] feature: MSL_Pressure, importance: 5.9178574476959616e-05 feature: Precipitation_Total, importance: 4.97035440399459e-06 feature: total_cld_lay, importance: 2.1617564243998973e-05 feature: high_cld_lay, importance: 1.2584614786614461e-05 feature: mid_cld_lay, importance: 8.72101021209456e-06 feature: low_cld_lay, importance: 4.603057530062024e-05 feature: Sunshine_Duration, importance: 4.1262250202964235e-05 feature: Shortwave_Radiation, importance: 5.055077511726679e-05 feature: Direct_Shortwave_Radiation, importance: 7.946729741094226e-05 feature: Diffuse_Shortwave_Radiation, importance: 6.044691321479496e-05 feature: Evptrp, importance: 7.510901172714087e-05 feature: FAO_Ref_Evptrp_2m, importance: 0.00033593825538319835 feature: CAPE, importance: 0.03195203888317169 feature: Wind_Gust, importance: 8.925587468961249e-05 feature: Wind_Speed_10m, importance: 9.313587077140822e-05 feature: Wind_Dir_10m, importance: 6.275760355783387e-05 feature: Wind_Speed_80m, importance: 0.00011505251666989408 feature: Wind_Dir_80m, importance: 7.292324593609635e-05 feature: Wind_Speed_900mb, importance: 6.133734828924194e-05 feature: Wind_Dir_900mb, importance: 7.542159873001992e-05 feature: Wind_Speed_850mb, importance: 5.745048623413823e-05 feature: Wind_Dir_850mb, importance: 6.785887433323778e-05 feature: Wind_Speed_700_mb], importance: 7.47255042725375e-05 feature: Wind_Dir_700mb, importance: 6.1519491770601e-05 feature: Wind_Speed_500mb, importance: 7.702519217232532e-05 feature: Wind_Dir_500mb, importance: 6.097164255501906e-05 feature: GP_Height_1000mb, importance: 6.787157428717148e-05 feature: GP_Height_850mb, importance: 9.765918256131429e-05 feature: GP_Height_700mb, importance: 0.00010582403696931768 feature: GP_Height_500mb, importance: 0.00034854497329230293 feature: Temp_1000mb, importance: 0.12312103474001601 feature: Temp_850mb, importance: 0.008096429456201527 feature: Temp_700mb, importance: 0.0004813106283136684 feature: Temp, importance: 0.04065489481891291 feature: Soil_Temp, importance: 0.0012342726773117902 feature: Soil_Moisture, importance: 0.00024056637395156551 feature: Vapor_Pressure_Deficit_2m, importance: 0.7907300616659558 feature: day, importance: 6.334186764938379e-05 feature: year, importance: 6.298015763046619e-05 feature: month, importance: 0.0009246175262858175 feature: hour_of_day, importance: 5.3238991026877485e-05
#split the data for training and testing
trainf,testf,trainl,testl=train_test_split(features,labels, test_size=0.15, random_state=22)
print(len(trainf))
print(len(trainl))
print(len(testf))
print(len(testl))
print("trainf:", trainf.shape, "trianl:", trainl.shape)
print("testf:", testf.shape, "testl:", testl.shape)
#Conver the pandas dataframes into arrays for train and test features
trainf_array = trainf.to_numpy()
trainl_array = trainl.to_numpy()
testf_array = testf.to_numpy()
testl_array = testl.to_numpy()
print("trainf_array:", trainf_array.shape, "trianl_array:", trainl_array.shape)
print("testf_array:", testf_array.shape, "testl_array:", testl_array.shape)
155080 155080 27368 27368 trainf: (155080, 41) trianl: (155080, 2) testf: (27368, 41) testl: (27368, 2) trainf_array: (155080, 41) trianl_array: (155080, 2) testf_array: (27368, 41) testl_array: (27368, 2)
#Apply normalization on the training and testing feature sets
scaler = StandardScaler()
# transform data
trainf_array_scaled = scaler.fit_transform(trainf_array)
testf_array_scaled = scaler.fit_transform(testf_array)
set_config(print_changed_only=False)
model_LR = LinearRegression(normalize=True)
model_LR.fit(trainf_array,trainl_array)
LRpred = model_LR.predict(testf_array)
print(r2_score(LRpred, testl_array))
0.9575483230709314
#Model Evaluation
mse_temp = mean_squared_error(testl_array[:,0], LRpred[:,0])
print("Mean Squared Error for Temperature is : ", mse_temp)
mse_humidity = mean_squared_error(testl_array[:,1], LRpred[:,1])
print(" Mean Squared Error for Relative Humidity is : ", mse_humidity)
rmse_temp = math.sqrt(mean_squared_error(testl_array[:,0], LRpred[:,0]))
print("Root Mean Squared Error for Temperature is : ", rmse_temp)
rmse_humidity = math.sqrt(mean_squared_error(testl_array[:,1], LRpred[:,1]))
print("Root Mean Squared Error for Relative Humidity is : ", rmse_humidity)
exp_var_score_temp = explained_variance_score(testl_array[:,0], LRpred[:,0])
print("The Explained variance score Temperature is {}".format(exp_var_score_temp))
exp_var_score_humidity = explained_variance_score(testl_array[:,1], LRpred[:,1])
print("The Explained variance score in terms of Relative Humidity is {}".format(exp_var_score_humidity))
mae_temp = mean_absolute_error(testl_array[:,0], LRpred[:,0])
print("Explained Variance Score for Temperature is ", mae_temp)
mae_hum = mean_absolute_error(testl_array[:,1], LRpred[:,1])
print("Explained Variance Score for Relative Humidity is ", mae_hum)
def mean_absolute_percentage_error(y_true, y_pred):
y_true, y_pred = np.array(y_true), np.array(y_pred)
return np.mean(np.abs((y_true - y_pred) / (y_true))) * 100
mean_abs_perc_error_temp = mean_absolute_percentage_error(testl_array[:,0], LRpred[:,0])
print("The mean absolute percentage error for Temperature is" ,mean_abs_perc_error_temp)
mean_abs_perc_error_humidity = mean_absolute_percentage_error(testl_array[:,1], LRpred[:,1])
print("The mean absolute percentage error for Relative Humidity is" ,mean_abs_perc_error_humidity)
Mean Squared Error for Temperature is : 0.2053901206852193 Mean Squared Error for Relative Humidity is : 23.611172413791728 Root Mean Squared Error for Temperature is : 0.4531998683640798 Root Mean Squared Error for Relative Humidity is : 4.85913288702745 The Explained variance score Temperature is 0.9948201836109379 The Explained variance score in terms of Relative Humidity is 0.9259200468779117 Explained Variance Score for Temperature is 0.3312862931946686 Explained Variance Score for Relative Humidity is 3.348559790511139 The mean absolute percentage error for Temperature is 3.2971421473688083 The mean absolute percentage error for Relative Humidity is 5.456512698160341
#Create a pandas dataframe to store the metrics of different models performed.
ModelMetrics = pd.DataFrame()
ModelMetrics= ModelMetrics.append([['Linear Regression',mse_temp,mse_humidity,rmse_temp,rmse_humidity,exp_var_score_temp,exp_var_score_humidity,mae_temp,mae_hum,mean_abs_perc_error_temp,mean_abs_perc_error_humidity]])
#ModelMetrics.columns= ['ModelName','MSE-Temp','MSE-RH','RMSE-Temp','RMSE-RH','R2Score-Temp','R2Score-RH','MAE-Temp','MAE-RH','MAPE-Temp','MAPE-RH']
ModelMetrics
| 0 | 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | |
|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | Linear Regression | 0.21 | 23.61 | 0.45 | 4.86 | 0.99 | 0.93 | 0.33 | 3.35 | 3.3 | 5.46 |
#Temperature Prediction
plt.plot(range(0,100), testl_array[0:100,0], label="Actual_Temp", color='r')
plt.scatter(range(0,100), LRpred[0:100,0], label="Predicted_Temp", color='b')
plt.xlabel('Index')
plt.ylabel('Temperature in Degrees Celcius')
plt.legend()
plt.show()
#Humidity Prediction
plt.plot(range(0,100), testl_array[0:100,1], label="Actual_Humidity", color='g')
plt.scatter(range(0,100), LRpred[0:100,1], label="Predicted_Humidity", color='y')
plt.xlabel('Index')
plt.ylabel('Percentage Humidity')
plt.legend()
plt.show()
#Temperature Scatter Plot for predictions
plt.scatter(testl_array[:,0], LRpred[:,0])
plt.ylabel("Predicted Temperatures")
plt.xlabel("Test Temperatures")
plt.title(" Predicted Vs. Test Temperature")
plt.show()
#RH Scatter Plot for predictions
plt.scatter(testl_array[:,1], LRpred[:,1])
plt.ylabel("Predicted RH")
plt.xlabel("Test RH")
plt.title(" Predicted Vs. Test RH")
plt.show()
model_KNN = KNeighborsRegressor()
model_KNN.fit(trainf_array_scaled,trainl_array)
KNeighborsRegressor(algorithm='auto', leaf_size=30, metric='minkowski',
metric_params=None, n_jobs=None, n_neighbors=5, p=2,
weights='uniform')
KNNpred = model_KNN.predict(testf_array_scaled)
#Model Evaluation
mse_temp = mean_squared_error(testl_array[:,0], KNNpred[:,0])
print("Mean Squared Error for Temperature is : ", mse_temp)
mse_humidity = mean_squared_error(testl_array[:,1], KNNpred[:,1])
print(" Mean Squared Error for Relative Humidity is : ", mse_humidity)
rmse_temp = math.sqrt(mean_squared_error(testl_array[:,0], KNNpred[:,0]))
print("Root Mean Squared Error for Temperature is : ", rmse_temp)
rmse_humidity = math.sqrt(mean_squared_error(testl_array[:,1], KNNpred[:,1]))
print("Root Mean Squared Error for Relative Humidity is : ", rmse_humidity)
exp_var_score_temp = explained_variance_score(testl_array[:,0], KNNpred[:,0])
print("The Explained variance score Temperature is {}".format(exp_var_score_temp))
exp_var_score_humidity = explained_variance_score(testl_array[:,1], KNNpred[:,1])
print("The Explained variance score in terms of Relative Humidity is {}".format(exp_var_score_humidity))
mae_temp = mean_absolute_error(testl_array[:,0], KNNpred[:,0])
print("Explained Variance Score for Temperature is ", mae_temp)
mae_hum = mean_absolute_error(testl_array[:,1], KNNpred[:,1])
print("Explained Variance Score for Relative Humidity is ", mae_hum)
mean_abs_perc_error_temp = mean_absolute_percentage_error(testl_array[:,0], KNNpred[:,0])
print("The mean absolute percentage error for Temperature is" ,mean_abs_perc_error_temp)
mean_abs_perc_error_humidity = mean_absolute_percentage_error(testl_array[:,1], KNNpred[:,1])
print("The mean absolute percentage error for Relative Humidity is" ,mean_abs_perc_error_humidity)
Mean Squared Error for Temperature is : 0.8755923556772365 Mean Squared Error for Relative Humidity is : 21.92111663256358 Root Mean Squared Error for Temperature is : 0.9357309205520765 Root Mean Squared Error for Relative Humidity is : 4.681999213216891 The Explained variance score Temperature is 0.9779243848328526 The Explained variance score in terms of Relative Humidity is 0.9313580480958321 Explained Variance Score for Temperature is 0.6861658010998319 Explained Variance Score for Relative Humidity is 3.3315697164571763 The mean absolute percentage error for Temperature is 5.813117037860256 The mean absolute percentage error for Relative Humidity is 5.079609244263588
ModelMetrics= ModelMetrics.append([['KNN Regression',mse_temp,mse_humidity,rmse_temp,rmse_humidity,exp_var_score_temp,exp_var_score_humidity,mae_temp,mae_hum,mean_abs_perc_error_temp,mean_abs_perc_error_humidity]])
ModelMetrics
| 0 | 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | |
|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | Linear Regression | 0.21 | 23.61 | 0.45 | 4.86 | 0.99 | 0.93 | 0.33 | 3.35 | 3.30 | 5.46 |
| 0 | KNN Regression | 0.88 | 21.92 | 0.94 | 4.68 | 0.98 | 0.93 | 0.69 | 3.33 | 5.81 | 5.08 |
#Temperature Prediction
plt.plot(range(0,100), testl_array[0:100,0], label="Actual_Temp", color='r')
plt.scatter(range(0,100), KNNpred[0:100,0], label="Predicted_Temp", color='b')
plt.xlabel('Index')
plt.ylabel('Temperature in Degrees Celcius')
plt.legend()
plt.show()
#RH Prediction
plt.plot(range(0,100), testl_array[0:100,1], label="Actual_Humidity", color='g')
plt.scatter(range(0,100), KNNpred[0:100,1], label="Predicted_Humidity", color='y')
plt.xlabel('Index')
plt.ylabel('Percentage Humidity')
plt.legend()
plt.show()
#Temperature Scatter Plot for predictions
plt.scatter(testl_array[:,0], KNNpred[:,0])
plt.ylabel("Predicted Temperatures")
plt.xlabel("Test Temperatures")
plt.title(" Predicted Vs. Test Temperature")
plt.show()
#RH Scatter Plot for predictions
plt.scatter(testl_array[:,1], KNNpred[:,1])
plt.ylabel("Predicted RH")
plt.xlabel("Test RH")
plt.title(" Predicted Vs. Test RH")
plt.show()
model_SVR = LinearSVR()
wrapper_SVR = MultiOutputRegressor(model_SVR)
wrapper_SVR.fit(trainf_array_scaled,trainl_array)
C:\Users\ravip\anaconda3\lib\site-packages\sklearn\svm\_base.py:976: ConvergenceWarning: Liblinear failed to converge, increase the number of iterations.
warnings.warn("Liblinear failed to converge, increase "
MultiOutputRegressor(estimator=LinearSVR(C=1.0, dual=True, epsilon=0.0,
fit_intercept=True,
intercept_scaling=1.0,
loss='epsilon_insensitive',
max_iter=1000, random_state=None,
tol=0.0001, verbose=0),
n_jobs=None)
SVRpred = wrapper_SVR.predict(testf_array_scaled)
#Model Evaluation
mse_temp = mean_squared_error(testl_array[:,0], SVRpred[:,0])
print("Mean Squared Error for Temperature is : ", mse_temp)
mse_humidity = mean_squared_error(testl_array[:,1], SVRpred[:,1])
print("Mean Squared Error for Relative Humidity is : ", mse_humidity)
rmse_temp = math.sqrt(mean_squared_error(testl_array[:,0], SVRpred[:,0]))
print("Root Mean Squared Error for Temperature is : ", rmse_temp)
rmse_humidity = math.sqrt(mean_squared_error(testl_array[:,1], SVRpred[:,1]))
print("Root Mean Squared Error for Relative Humidity is : ", rmse_humidity)
exp_var_score_temp = explained_variance_score(testl_array[:,0], SVRpred[:,0])
print("The Explained variance score Temperature is {}".format(exp_var_score_temp))
exp_var_score_humidity = explained_variance_score(testl_array[:,1], SVRpred[:,1])
print("The Explained variance score in terms of Relative Humidity is {}".format(exp_var_score_humidity))
mae_temp = mean_absolute_error(testl_array[:,0], SVRpred[:,0])
print("Explained Variance Score for Temperature is ", mae_temp)
mae_hum = mean_absolute_error(testl_array[:,1], SVRpred[:,1])
print("Explained Variance Score for Relative Humidity is ", mae_hum)
mean_abs_perc_error_temp = mean_absolute_percentage_error(testl_array[:,0], SVRpred[:,0])
print("The mean absolute percentage error for Temperature is" ,mean_abs_perc_error_temp)
mean_abs_perc_error_humidity = mean_absolute_percentage_error(testl_array[:,1], SVRpred[:,1])
print("The mean absolute percentage error for Relative Humidity is" ,mean_abs_perc_error_humidity)
Mean Squared Error for Temperature is : 0.21813062283558748 Mean Squared Error for Relative Humidity is : 24.599737729617775 Root Mean Squared Error for Temperature is : 0.46704456193771005 Root Mean Squared Error for Relative Humidity is : 4.959812267578056 The Explained variance score Temperature is 0.9945436488807007 The Explained variance score in terms of Relative Humidity is 0.922915751950394 Explained Variance Score for Temperature is 0.3257398925780984 Explained Variance Score for Relative Humidity is 3.2621434549466137 The mean absolute percentage error for Temperature is 3.6084152629758677 The mean absolute percentage error for Relative Humidity is 5.504315743327771
ModelMetrics= ModelMetrics.append([['SVR-Multioutput',mse_temp,mse_humidity,rmse_temp,rmse_humidity,exp_var_score_temp,exp_var_score_humidity,mae_temp,mae_hum,mean_abs_perc_error_temp,mean_abs_perc_error_humidity]])
ModelMetrics
| 0 | 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | |
|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | Linear Regression | 0.21 | 23.61 | 0.45 | 4.86 | 0.99 | 0.93 | 0.33 | 3.35 | 3.30 | 5.46 |
| 0 | KNN Regression | 0.88 | 21.92 | 0.94 | 4.68 | 0.98 | 0.93 | 0.69 | 3.33 | 5.81 | 5.08 |
| 0 | SVR-Multioutput | 0.22 | 24.60 | 0.47 | 4.96 | 0.99 | 0.92 | 0.33 | 3.26 | 3.61 | 5.50 |
#Temperature Prediction
plt.plot(range(0,100), testl_array[0:100,0], label="Actual_Temp", color='r')
plt.scatter(range(0,100), SVRpred[0:100,0], label="Predicted_Temp", color='b')
plt.xlabel('Index')
plt.ylabel('Temperature in Degrees Celcius')
plt.legend()
plt.show()
#RH Prediction
plt.plot(range(0,100), testl_array[0:100,1], label="Actual_Humidity", color='g')
plt.scatter(range(0,100), SVRpred[0:100,1], label="Predicted_Humidity", color='y')
plt.xlabel('Index')
plt.ylabel('Percentage Humidity')
plt.legend()
plt.show()
#Temperature Scatter Plot for predictions
plt.scatter(testl_array[:,0], SVRpred[:,0])
plt.ylabel("Predicted Temperatures")
plt.xlabel("Test Temperatures")
plt.title(" Predicted Vs. Test Temperature")
plt.show()
#RH Scatter Plot for predictions
plt.scatter(testl_array[:,1], SVRpred[:,1])
plt.ylabel("Predicted RH")
plt.xlabel("Test RH")
plt.title(" Predicted Vs. Test RH")
plt.show()
wrapper_RCSVR=RegressorChain(model_SVR)
wrapper_RCSVR.fit(trainf_array_scaled,trainl_array)
C:\Users\ravip\anaconda3\lib\site-packages\sklearn\svm\_base.py:976: ConvergenceWarning: Liblinear failed to converge, increase the number of iterations.
warnings.warn("Liblinear failed to converge, increase "
C:\Users\ravip\anaconda3\lib\site-packages\sklearn\svm\_base.py:976: ConvergenceWarning: Liblinear failed to converge, increase the number of iterations.
warnings.warn("Liblinear failed to converge, increase "
RegressorChain(base_estimator=LinearSVR(C=1.0, dual=True, epsilon=0.0,
fit_intercept=True,
intercept_scaling=1.0,
loss='epsilon_insensitive',
max_iter=1000, random_state=None,
tol=0.0001, verbose=0),
cv=None, order=None, random_state=None)
RCSVRpred = wrapper_RCSVR.predict(testf_array_scaled)
#Model Evaluation
mse_temp = mean_squared_error(testl_array[:,0], RCSVRpred[:,0])
print("Mean Squared Error for Temperature is : ", mse_temp)
mse_humidity = mean_squared_error(testl_array[:,1], RCSVRpred[:,1])
print("Mean Squared Error for Relative Humidity is : ", mse_humidity)
rmse_temp = math.sqrt(mean_squared_error(testl_array[:,0], RCSVRpred[:,0]))
print("Root Mean Squared Error for Temperature is : ", rmse_temp)
rmse_humidity = math.sqrt(mean_squared_error(testl_array[:,1], RCSVRpred[:,1]))
print("Root Mean Squared Error for Relative Humidity is : ", rmse_humidity)
exp_var_score_temp = explained_variance_score(testl_array[:,0], RCSVRpred[:,0])
print("The Explained variance score Temperature is {}".format(exp_var_score_temp))
exp_var_score_humidity = explained_variance_score(testl_array[:,1], RCSVRpred[:,1])
print("The Explained variance score in terms of Relative Humidity is {}".format(exp_var_score_humidity))
mae_temp = mean_absolute_error(testl_array[:,0], RCSVRpred[:,0])
print("Explained Variance Score for Temperature is ", mae_temp)
mae_hum = mean_absolute_error(testl_array[:,1], RCSVRpred[:,1])
print("Explained Variance Score for Relative Humidity is ", mae_hum)
mean_abs_perc_error_temp = mean_absolute_percentage_error(testl_array[:,0], RCSVRpred[:,0])
print("The mean absolute percentage error for Temperature is" ,mean_abs_perc_error_temp)
mean_abs_perc_error_humidity = mean_absolute_percentage_error(testl_array[:,1], RCSVRpred[:,1])
print("The mean absolute percentage error for Relative Humidity is" ,mean_abs_perc_error_humidity)
Mean Squared Error for Temperature is : 0.2174440033688633 Mean Squared Error for Relative Humidity is : 24.643339368063298 Root Mean Squared Error for Temperature is : 0.46630891409972347 Root Mean Squared Error for Relative Humidity is : 4.964205814434298 The Explained variance score Temperature is 0.9945497698440212 The Explained variance score in terms of Relative Humidity is 0.9229126033007371 Explained Variance Score for Temperature is 0.32637323127021617 Explained Variance Score for Relative Humidity is 3.2724205740536103 The mean absolute percentage error for Temperature is 3.5828899511728864 The mean absolute percentage error for Relative Humidity is 5.487541557266725
ModelMetrics= ModelMetrics.append([['SVR-RegressorChain',mse_temp,mse_humidity,rmse_temp,rmse_humidity,exp_var_score_temp,exp_var_score_humidity,mae_temp,mae_hum,mean_abs_perc_error_temp,mean_abs_perc_error_humidity]])
ModelMetrics
| 0 | 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | |
|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | Linear Regression | 0.21 | 23.61 | 0.45 | 4.86 | 0.99 | 0.93 | 0.33 | 3.35 | 3.30 | 5.46 |
| 0 | KNN Regression | 0.88 | 21.92 | 0.94 | 4.68 | 0.98 | 0.93 | 0.69 | 3.33 | 5.81 | 5.08 |
| 0 | SVR-Multioutput | 0.22 | 24.60 | 0.47 | 4.96 | 0.99 | 0.92 | 0.33 | 3.26 | 3.61 | 5.50 |
| 0 | SVR-RegressorChain | 0.22 | 24.64 | 0.47 | 4.96 | 0.99 | 0.92 | 0.33 | 3.27 | 3.58 | 5.49 |
#Temperature Prediction
plt.plot(range(0,100), testl_array[0:100,0], label="Actual_Temp", color='r')
plt.scatter(range(0,100), RCSVRpred[0:100,0], label="Predicted_Temp", color='b')
plt.xlabel('Index')
plt.ylabel('Temperature in Degrees Celcius')
plt.legend()
plt.show()
#RH Prediction
plt.plot(range(0,100), testl_array[0:100,1], label="Actual_Humidity", color='g')
plt.scatter(range(0,100), RCSVRpred[0:100,1], label="Predicted_Humidity", color='y')
plt.xlabel('Index')
plt.ylabel('Percentage Humidity')
plt.legend()
plt.show()
#Temperature Scatter Plot for predictions
plt.scatter(testl_array[:,0], RCSVRpred[:,0])
plt.ylabel("Predicted Temperatures")
plt.xlabel("Test Temperatures")
plt.title(" Predicted Vs. Test Temperature")
plt.show()
#RH Scatter Plot for predictions
plt.scatter(testl_array[:,1], RCSVRpred[:,1])
plt.ylabel("Predicted RH")
plt.xlabel("Test RH")
plt.title(" Predicted Vs. Test RH")
plt.show()
model_DT = DecisionTreeRegressor()
model_DT.fit(trainf_array,trainl_array)
DecisionTreeRegressor(ccp_alpha=0.0, criterion='mse', max_depth=None,
max_features=None, max_leaf_nodes=None,
min_impurity_decrease=0.0, min_impurity_split=None,
min_samples_leaf=1, min_samples_split=2,
min_weight_fraction_leaf=0.0, presort='deprecated',
random_state=None, splitter='best')
DTpred = model_DT.predict(testf_array)
#Model Evaluation
mse_temp = mean_squared_error(testl_array[:,0], DTpred[:,0])
print("Mean Squared Error for Temperature is : ", mse_temp)
mse_humidity = mean_squared_error(testl_array[:,1], DTpred[:,1])
print("Mean Squared Error for Relative Humidity is : ", mse_humidity)
rmse_temp = math.sqrt(mean_squared_error(testl_array[:,0], DTpred[:,0]))
print("Root Mean Squared Error for Temperature is : ", rmse_temp)
rmse_humidity = math.sqrt(mean_squared_error(testl_array[:,1], DTpred[:,1]))
print("Root Mean Squared Error for Relative Humidity is : ", rmse_humidity)
exp_var_score_temp = explained_variance_score(testl_array[:,0], DTpred[:,0])
print("The Explained variance score Temperature is {}".format(exp_var_score_temp))
exp_var_score_humidity = explained_variance_score(testl_array[:,1], DTpred[:,1])
print("The Explained variance score in terms of Relative Humidity is {}".format(exp_var_score_humidity))
mae_temp = mean_absolute_error(testl_array[:,0], DTpred[:,0])
print("Explained Variance Score for Temperature is ", mae_temp)
mae_hum = mean_absolute_error(testl_array[:,1], DTpred[:,1])
print("Explained Variance Score for Relative Humidity is ", mae_hum)
mean_abs_perc_error_temp = mean_absolute_percentage_error(testl_array[:,0], DTpred[:,0])
print("The mean absolute percentage error for Temperature is" ,mean_abs_perc_error_temp)
mean_abs_perc_error_humidity = mean_absolute_percentage_error(testl_array[:,1], DTpred[:,1])
print("The mean absolute percentage error for Relative Humidity is" ,mean_abs_perc_error_humidity)
Mean Squared Error for Temperature is : 0.32774279562206793 Mean Squared Error for Relative Humidity is : 1.5895936860567086 Root Mean Squared Error for Temperature is : 0.5724882493309954 Root Mean Squared Error for Relative Humidity is : 1.26079089703912 The Explained variance score Temperature is 0.9917347219801436 The Explained variance score in terms of Relative Humidity is 0.9950123379544541 Explained Variance Score for Temperature is 0.34247257538000586 Explained Variance Score for Relative Humidity is 0.6624524992692196 The mean absolute percentage error for Temperature is 3.1390522502086773 The mean absolute percentage error for Relative Humidity is 1.2082709443972117
ModelMetrics= ModelMetrics.append([['Decison Tree Regressor',mse_temp,mse_humidity,rmse_temp,rmse_humidity,exp_var_score_temp,exp_var_score_humidity,mae_temp,mae_hum,mean_abs_perc_error_temp,mean_abs_perc_error_humidity]])
ModelMetrics
| 0 | 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | |
|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | Linear Regression | 0.21 | 23.61 | 0.45 | 4.86 | 0.99 | 0.93 | 0.33 | 3.35 | 3.30 | 5.46 |
| 0 | KNN Regression | 0.88 | 21.92 | 0.94 | 4.68 | 0.98 | 0.93 | 0.69 | 3.33 | 5.81 | 5.08 |
| 0 | SVR-Multioutput | 0.22 | 24.60 | 0.47 | 4.96 | 0.99 | 0.92 | 0.33 | 3.26 | 3.61 | 5.50 |
| 0 | SVR-RegressorChain | 0.22 | 24.64 | 0.47 | 4.96 | 0.99 | 0.92 | 0.33 | 3.27 | 3.58 | 5.49 |
| 0 | Decison Tree Regressor | 0.33 | 1.59 | 0.57 | 1.26 | 0.99 | 1.00 | 0.34 | 0.66 | 3.14 | 1.21 |
#Temperature Prediction
plt.plot(range(0,100), testl_array[0:100,0], label="Actual_Temp", color='r')
plt.scatter(range(0,100), DTpred[0:100,0], label="Predicted_Temp", color='b')
plt.xlabel('Index')
plt.ylabel('Temperature in Degrees Celcius')
plt.legend()
plt.show()
#RH Prediction
plt.plot(range(0,100), testl_array[0:100,1], label="Actual_Humidity", color='g')
plt.scatter(range(0,100), DTpred[0:100,1], label="Predicted_Humidity", color='y')
plt.xlabel('Index')
plt.ylabel('Percentage Humidity')
plt.legend()
plt.show()
#Temperature Scatter Plot for predictions
plt.scatter(testl_array[:,0], DTpred[:,0])
plt.ylabel("Predicted Temperatures")
plt.xlabel("Test Temperatures")
plt.title(" Predicted Vs. Test Temperature")
plt.show()
#RH Scatter Plot for predictions
plt.scatter(testl_array[:,1], DTpred[:,1])
plt.ylabel("Predicted RH")
plt.xlabel("Test RH")
plt.title(" Predicted Vs. Test RH")
plt.show()
gbr = GradientBoostingRegressor()
model_gbr = MultiOutputRegressor(estimator=gbr)
model_gbr.fit(trainf_array_scaled,trainl_array)
MultiOutputRegressor(estimator=GradientBoostingRegressor(alpha=0.9,
ccp_alpha=0.0,
criterion='friedman_mse',
init=None,
learning_rate=0.1,
loss='ls', max_depth=3,
max_features=None,
max_leaf_nodes=None,
min_impurity_decrease=0.0,
min_impurity_split=None,
min_samples_leaf=1,
min_samples_split=2,
min_weight_fraction_leaf=0.0,
n_estimators=100,
n_iter_no_change=None,
presort='deprecated',
random_state=None,
subsample=1.0,
tol=0.0001,
validation_fraction=0.1,
verbose=0,
warm_start=False),
n_jobs=None)
GBRpred=model_gbr.predict(testf_array_scaled)
#Model Evaluation
mse_temp = mean_squared_error(testl_array[:,0], GBRpred[:,0])
print("Mean Squared Error for Temperature is : ", mse_temp)
mse_humidity = mean_squared_error(testl_array[:,1], GBRpred[:,1])
print("Mean Squared Error for Relative Humidity is : ", mse_humidity)
rmse_temp = math.sqrt(mean_squared_error(testl_array[:,0], GBRpred[:,0]))
print("Root Mean Squared Error for Temperature is : ", rmse_temp)
rmse_humidity = math.sqrt(mean_squared_error(testl_array[:,1], GBRpred[:,1]))
print("Root Mean Squared Error for Relative Humidity is : ", rmse_humidity)
exp_var_score_temp = explained_variance_score(testl_array[:,0], GBRpred[:,0])
print("The Explained variance score Temperature is {}".format(exp_var_score_temp))
exp_var_score_humidity = explained_variance_score(testl_array[:,1], GBRpred[:,1])
print("The Explained variance score in terms of Relative Humidity is {}".format(exp_var_score_humidity))
mae_temp = mean_absolute_error(testl_array[:,0], GBRpred[:,0])
print("Explained Variance Score for Temperature is ", mae_temp)
mae_hum = mean_absolute_error(testl_array[:,1], GBRpred[:,1])
print("Explained Variance Score for Relative Humidity is ", mae_hum)
mean_abs_perc_error_temp = mean_absolute_percentage_error(testl_array[:,0], GBRpred[:,0])
print("The mean absolute percentage error for Temperature is" ,mean_abs_perc_error_temp)
mean_abs_perc_error_humidity = mean_absolute_percentage_error(testl_array[:,1], GBRpred[:,1])
print("The mean absolute percentage error for Relative Humidity is" ,mean_abs_perc_error_humidity)
Mean Squared Error for Temperature is : 0.17083562044173928 Mean Squared Error for Relative Humidity is : 2.934639318517773 Root Mean Squared Error for Temperature is : 0.4133226589986802 Root Mean Squared Error for Relative Humidity is : 1.7130789002605142 The Explained variance score Temperature is 0.9956924634782842 The Explained variance score in terms of Relative Humidity is 0.9908027240697022 Explained Variance Score for Temperature is 0.29705702368634024 Explained Variance Score for Relative Humidity is 1.1105368710712196 The mean absolute percentage error for Temperature is 2.9951625843664065 The mean absolute percentage error for Relative Humidity is 1.956731505537482
ModelMetrics= ModelMetrics.append([['Gradient Boosted Trees',mse_temp,mse_humidity,rmse_temp,rmse_humidity,exp_var_score_temp,exp_var_score_humidity,mae_temp,mae_hum,mean_abs_perc_error_temp,mean_abs_perc_error_humidity]])
ModelMetrics
| 0 | 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | |
|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | Linear Regression | 0.21 | 23.61 | 0.45 | 4.86 | 0.99 | 0.93 | 0.33 | 3.35 | 3.30 | 5.46 |
| 0 | KNN Regression | 0.88 | 21.92 | 0.94 | 4.68 | 0.98 | 0.93 | 0.69 | 3.33 | 5.81 | 5.08 |
| 0 | SVR-Multioutput | 0.22 | 24.60 | 0.47 | 4.96 | 0.99 | 0.92 | 0.33 | 3.26 | 3.61 | 5.50 |
| 0 | SVR-RegressorChain | 0.22 | 24.64 | 0.47 | 4.96 | 0.99 | 0.92 | 0.33 | 3.27 | 3.58 | 5.49 |
| 0 | Decison Tree Regressor | 0.33 | 1.59 | 0.57 | 1.26 | 0.99 | 1.00 | 0.34 | 0.66 | 3.14 | 1.21 |
| 0 | Gradient Boosted Trees | 0.17 | 2.93 | 0.41 | 1.71 | 1.00 | 0.99 | 0.30 | 1.11 | 3.00 | 1.96 |
#Temperature Prediction
plt.plot(range(0,100), testl_array[0:100,0], label="Actual_Temp", color='r')
plt.scatter(range(0,100), GBRpred[0:100,0], label="Predicted_Temp", color='b')
plt.xlabel('Index')
plt.ylabel('Temperature in Degrees Celcius')
plt.legend()
plt.show()
#RH Prediction
plt.plot(range(0,100), testl_array[0:100,1], label="Actual_Humidity", color='g')
plt.scatter(range(0,100), GBRpred[0:100,1], label="Predicted_Humidity", color='y')
plt.xlabel('Index')
plt.ylabel('Percentage Humidity')
plt.legend()
plt.show()
#Temperature Scatter Plot for predictions
plt.scatter(testl_array[:,0], GBRpred[:,0])
plt.ylabel("Predicted Temperatures")
plt.xlabel("Test Temperatures")
plt.title(" Predicted Vs. Test Temperature")
plt.show()
#RH Scatter Plot for predictions
plt.scatter(testl_array[:,1], GBRpred[:,1])
plt.ylabel("Predicted RH")
plt.xlabel("Test RH")
plt.title(" Predicted Vs. Test RH")
plt.show()
model_lgb = MultiOutputRegressor(lgb.LGBMRegressor(random_state=41), n_jobs=-1)
model_lgb.fit(trainf_array_scaled, trainl_array)
MultiOutputRegressor(estimator=LGBMRegressor(boosting_type='gbdt',
class_weight=None,
colsample_bytree=1.0,
importance_type='split',
learning_rate=0.1, max_depth=-1,
min_child_samples=20,
min_child_weight=0.001,
min_split_gain=0.0,
n_estimators=100, n_jobs=-1,
num_leaves=31, objective=None,
random_state=41, reg_alpha=0.0,
reg_lambda=0.0, silent=True,
subsample=1.0,
subsample_for_bin=200000,
subsample_freq=0),
n_jobs=-1)
LGBpred = model_lgb.predict(testf_array_scaled)
#Model Evaluation
mse_temp = mean_squared_error(testl_array[:,0], LGBpred[:,0])
print("Mean Squared Error for Temperature is : ", mse_temp)
mse_humidity = mean_squared_error(testl_array[:,1], LGBpred[:,1])
print("Mean Squared Error for Relative Humidity is : ", mse_humidity)
rmse_temp = math.sqrt(mean_squared_error(testl_array[:,0], LGBpred[:,0]))
print("Root Mean Squared Error for Temperature is : ", rmse_temp)
rmse_humidity = math.sqrt(mean_squared_error(testl_array[:,1], LGBpred[:,1]))
print("Root Mean Squared Error for Relative Humidity is : ", rmse_humidity)
exp_var_score_temp = explained_variance_score(testl_array[:,0], LGBpred[:,0])
print("The Explained variance score Temperature is {}".format(exp_var_score_temp))
exp_var_score_humidity = explained_variance_score(testl_array[:,1], LGBpred[:,1])
print("The Explained variance score in terms of Relative Humidity is {}".format(exp_var_score_humidity))
mae_temp = mean_absolute_error(testl_array[:,0], LGBpred[:,0])
print("Explained Variance Score for Temperature is ", mae_temp)
mae_hum = mean_absolute_error(testl_array[:,1], LGBpred[:,1])
print("Explained Variance Score for Relative Humidity is ", mae_hum)
mean_abs_perc_error_temp = mean_absolute_percentage_error(testl_array[:,0], LGBpred[:,0])
print("The mean absolute percentage error for Temperature is" ,mean_abs_perc_error_temp)
mean_abs_perc_error_humidity = mean_absolute_percentage_error(testl_array[:,1], LGBpred[:,1])
print("The mean absolute percentage error for Relative Humidity is" ,mean_abs_perc_error_humidity)
Mean Squared Error for Temperature is : 0.07829868794607751 Mean Squared Error for Relative Humidity is : 0.8078769280297866 Root Mean Squared Error for Temperature is : 0.27981902713374857 Root Mean Squared Error for Relative Humidity is : 0.898819741677822 The Explained variance score Temperature is 0.9980254235822068 The Explained variance score in terms of Relative Humidity is 0.9974675512827723 Explained Variance Score for Temperature is 0.1918676768015798 Explained Variance Score for Relative Humidity is 0.5961751869917571 The mean absolute percentage error for Temperature is 2.73355517934084 The mean absolute percentage error for Relative Humidity is 1.0256503272653266
ModelMetrics= ModelMetrics.append([['Gradient Boosted Trees - LGB',mse_temp,mse_humidity,rmse_temp,rmse_humidity,exp_var_score_temp,exp_var_score_humidity,mae_temp,mae_hum,mean_abs_perc_error_temp,mean_abs_perc_error_humidity]])
ModelMetrics
| 0 | 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | |
|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | Linear Regression | 0.21 | 23.61 | 0.45 | 4.86 | 0.99 | 0.93 | 0.33 | 3.35 | 3.30 | 5.46 |
| 0 | KNN Regression | 0.88 | 21.92 | 0.94 | 4.68 | 0.98 | 0.93 | 0.69 | 3.33 | 5.81 | 5.08 |
| 0 | SVR-Multioutput | 0.22 | 24.60 | 0.47 | 4.96 | 0.99 | 0.92 | 0.33 | 3.26 | 3.61 | 5.50 |
| 0 | SVR-RegressorChain | 0.22 | 24.64 | 0.47 | 4.96 | 0.99 | 0.92 | 0.33 | 3.27 | 3.58 | 5.49 |
| 0 | Decison Tree Regressor | 0.33 | 1.59 | 0.57 | 1.26 | 0.99 | 1.00 | 0.34 | 0.66 | 3.14 | 1.21 |
| 0 | Gradient Boosted Trees | 0.17 | 2.93 | 0.41 | 1.71 | 1.00 | 0.99 | 0.30 | 1.11 | 3.00 | 1.96 |
| 0 | Gradient Boosted Trees - LGB | 0.08 | 0.81 | 0.28 | 0.90 | 1.00 | 1.00 | 0.19 | 0.60 | 2.73 | 1.03 |
#Temperature Prediction
plt.plot(range(0,100), testl_array[0:100,0], label="Actual_Temp", color='r')
plt.scatter(range(0,100), LGBpred[0:100,0], label="Predicted_Temp", color='b')
plt.xlabel('Index')
plt.ylabel('Temperature in Degrees Celcius')
plt.legend()
plt.show()
#RH Prediction
plt.plot(range(0,100), testl_array[0:100,1], label="Actual_Humidity", color='g')
plt.scatter(range(0,100), LGBpred[0:100,1], label="Predicted_Humidity", color='y')
plt.xlabel('Index')
plt.ylabel('Percentage Humidity')
plt.legend()
plt.show()
#Temperature Scatter Plot for predictions
plt.scatter(testl_array[:,0], LGBpred[:,0])
plt.ylabel("Predicted Temperatures")
plt.xlabel("Test Temperatures")
plt.title(" Predicted Vs. Test Temperature")
plt.show()
#RH Scatter Plot for predictions
plt.scatter(testl_array[:,1], LGBpred[:,1])
plt.ylabel("Predicted RH")
plt.xlabel("Test RH")
plt.title(" Predicted Vs. Test RH")
plt.show()
#model = RandomForestRegressor(n_estimators=100, bootstrap='TRUE', max_depth=20)
model_RF = RandomForestRegressor(n_jobs=-1, random_state=35)
model_RF.fit(trainf_array,trainl_array)
#parameters used by the current random forest model initiated.
from pprint import pprint
print('Parameters currently in use:\n')
pprint(model.get_params())
Parameters currently in use:
{'bootstrap': True,
'ccp_alpha': 0.0,
'criterion': 'mse',
'max_depth': None,
'max_features': 'auto',
'max_leaf_nodes': None,
'max_samples': None,
'min_impurity_decrease': 0.0,
'min_impurity_split': None,
'min_samples_leaf': 1,
'min_samples_split': 2,
'min_weight_fraction_leaf': 0.0,
'n_estimators': 100,
'n_jobs': None,
'oob_score': False,
'random_state': 42,
'verbose': 0,
'warm_start': False}
RFpred = model_RF.predict(testf_array)
#Model Evaluation
mse_temp = mean_squared_error(testl_array[:,0], RFpred[:,0])
print("Mean Squared Error for Temperature is : ", mse_temp)
mse_humidity = mean_squared_error(testl_array[:,1], RFpred[:,1])
print("Mean Squared Error for Relative Humidity is : ", mse_humidity)
rmse_temp = math.sqrt(mean_squared_error(testl_array[:,0], RFpred[:,0]))
print("Root Mean Squared Error for Temperature is : ", rmse_temp)
rmse_humidity = math.sqrt(mean_squared_error(testl_array[:,1], RFpred[:,1]))
print("Root Mean Squared Error for Relative Humidity is : ", rmse_humidity)
exp_var_score_temp = explained_variance_score(testl_array[:,0], RFpred[:,0])
print("The Explained variance score Temperature is {}".format(exp_var_score_temp))
exp_var_score_humidity = explained_variance_score(testl_array[:,1], RFpred[:,1])
print("The Explained variance score in terms of Relative Humidity is {}".format(exp_var_score_humidity))
mae_temp = mean_absolute_error(testl_array[:,0], RFpred[:,0])
print("Explained Variance Score for Temperature is ", mae_temp)
mae_hum = mean_absolute_error(testl_array[:,1], RFpred[:,1])
print("Explained Variance Score for Relative Humidity is ", mae_hum)
mean_abs_perc_error_temp = mean_absolute_percentage_error(testl_array[:,0], RFpred[:,0])
print("The mean absolute percentage error for Temperature is" ,mean_abs_perc_error_temp)
mean_abs_perc_error_humidity = mean_absolute_percentage_error(testl_array[:,1], RFpred[:,1])
print("The mean absolute percentage error for Relative Humidity is" ,mean_abs_perc_error_humidity)
Mean Squared Error for Temperature is : 0.106908996407001 Mean Squared Error for Relative Humidity is : 0.4812867436422099 Root Mean Squared Error for Temperature is : 0.3269694120357453 Root Mean Squared Error for Relative Humidity is : 0.6937483287491293 The Explained variance score Temperature is 0.9973046016152061 The Explained variance score in terms of Relative Humidity is 0.9984904946613009 Explained Variance Score for Temperature is 0.1992759622673287 Explained Variance Score for Relative Humidity is 0.3816113709441685 The mean absolute percentage error for Temperature is 2.0283611717550016 The mean absolute percentage error for Relative Humidity is 0.6935920178001578
ModelMetrics= ModelMetrics.append([['Random Forest Regression',mse_temp,mse_humidity,rmse_temp,rmse_humidity,exp_var_score_temp,exp_var_score_humidity,mae_temp,mae_hum,mean_abs_perc_error_temp,mean_abs_perc_error_humidity]])
ModelMetrics
| 0 | 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | |
|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | Linear Regression | 0.21 | 23.61 | 0.45 | 4.86 | 0.99 | 0.93 | 0.33 | 3.35 | 3.30 | 5.46 |
| 0 | KNN Regression | 0.88 | 21.92 | 0.94 | 4.68 | 0.98 | 0.93 | 0.69 | 3.33 | 5.81 | 5.08 |
| 0 | SVR-Multioutput | 0.22 | 24.60 | 0.47 | 4.96 | 0.99 | 0.92 | 0.33 | 3.26 | 3.61 | 5.50 |
| 0 | SVR-RegressorChain | 0.22 | 24.64 | 0.47 | 4.96 | 0.99 | 0.92 | 0.33 | 3.27 | 3.58 | 5.49 |
| 0 | Decison Tree Regressor | 0.33 | 1.59 | 0.57 | 1.26 | 0.99 | 1.00 | 0.34 | 0.66 | 3.14 | 1.21 |
| 0 | Gradient Boosted Trees | 0.17 | 2.93 | 0.41 | 1.71 | 1.00 | 0.99 | 0.30 | 1.11 | 3.00 | 1.96 |
| 0 | Gradient Boosted Trees - LGB | 0.08 | 0.81 | 0.28 | 0.90 | 1.00 | 1.00 | 0.19 | 0.60 | 2.73 | 1.03 |
| 0 | Random Forest Regression | 0.11 | 0.48 | 0.33 | 0.69 | 1.00 | 1.00 | 0.20 | 0.38 | 2.03 | 0.69 |
#Temperature Prediction
plt.plot(range(0,100), testl_array[0:100,0], label="Actual_Temp", color='r')
plt.scatter(range(0,100), RFpred[0:100,0], label="Predicted_Temp", color='b')
plt.xlabel('Index')
plt.ylabel('Temperature in Degrees Celcius')
plt.legend()
plt.show()
#RH Prediction
plt.plot(range(0,100), testl_array[0:100,1], label="Actual_Humidity", color='g')
plt.scatter(range(0,100), RFpred[0:100,1], label="Predicted_Humidity", color='y')
plt.xlabel('Index')
plt.ylabel('Percentage Humidity')
plt.legend()
plt.show()
#Temperature Scatter Plot for predictions
plt.scatter(testl_array[:,0], RFpred[:,0])
plt.ylabel("Predicted Temperatures")
plt.xlabel("Test Temperatures")
plt.title(" Predicted Vs. Test Temperature")
plt.show()
#RH Scatter Plot for predictions
plt.scatter(testl_array[:,1], RFpred[:,1])
plt.ylabel("Predicted RH")
plt.xlabel("Test RH")
plt.title(" Predicted Vs. Test RH")
plt.show()
Final_Predictions = pd.DataFrame({'Test_Temp':testl_array[:,0],'Test_RH':testl_array[:,1],'Linear_Reg_temp':LRpred[:,0],'Linear_Reg_RH':LRpred[:,1], 'KNN_Reg_temp':KNNpred[:,0], 'KNN_Reg_RH':KNNpred[:,1], 'SVR_temp':SVRpred[:,0], 'SVR_RH':SVRpred[:,1], 'RCSVR_temp':RCSVRpred[:,0], 'RCSVR_RH':RCSVRpred[:,1], 'DecisionTree_temp':DTpred[:,0], 'DecisionTree_RH': DTpred[:,1], 'GradentBoostedTree_temp':GBRpred[:,0], 'GradentBoostedTree_RH':GBRpred[:,1], 'LGB_temp':LGBpred[:,0], 'LGB_RH':LGBpred[:,1], 'RandomForest_temp':RFpred[:,0], 'RandomForest_RH':RFpred[:,1]})
Final_Predictions.head(25)
| Test_Temp | Test_RH | Linear_Reg_temp | Linear_Reg_RH | KNN_Reg_temp | KNN_Reg_RH | SVR_temp | SVR_RH | RCSVR_temp | RCSVR_RH | DecisionTree_temp | DecisionTree_RH | GradentBoostedTree_temp | GradentBoostedTree_RH | LGB_temp | LGB_RH | RandomForest_temp | RandomForest_RH | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 21.74 | 73.0 | 21.65 | 74.67 | 23.55 | 65.6 | 21.57 | 75.45 | 21.61 | 75.25 | 23.32 | 74.0 | 21.73 | 75.37 | 22.22 | 73.32 | 21.79 | 73.19 |
| 1 | 22.88 | 67.0 | 22.61 | 66.78 | 23.24 | 66.8 | 22.79 | 67.22 | 22.78 | 66.80 | 22.99 | 68.0 | 22.83 | 67.56 | 23.04 | 67.52 | 22.90 | 67.04 |
| 2 | 29.25 | 56.0 | 28.98 | 53.59 | 29.05 | 61.6 | 28.91 | 53.65 | 28.92 | 53.63 | 29.71 | 57.0 | 29.31 | 58.33 | 29.13 | 57.08 | 29.62 | 56.92 |
| 3 | 30.29 | 60.0 | 30.60 | 59.82 | 29.80 | 59.6 | 30.69 | 59.35 | 30.66 | 59.05 | 29.49 | 58.0 | 30.25 | 58.97 | 30.27 | 59.21 | 29.91 | 59.15 |
| 4 | 17.63 | 57.0 | 16.98 | 55.38 | 15.80 | 45.0 | 17.10 | 58.19 | 17.12 | 57.76 | 17.37 | 56.0 | 17.42 | 51.31 | 17.18 | 54.83 | 17.60 | 56.58 |
| 5 | 24.77 | 65.0 | 24.34 | 68.56 | 23.64 | 66.2 | 24.46 | 68.39 | 24.49 | 67.99 | 24.86 | 66.0 | 25.08 | 60.40 | 24.57 | 64.16 | 24.95 | 65.51 |
| 6 | 15.00 | 72.0 | 15.02 | 76.74 | 17.05 | 75.6 | 15.04 | 77.08 | 14.99 | 77.08 | 14.73 | 72.0 | 15.66 | 74.97 | 15.24 | 72.39 | 15.32 | 72.43 |
| 7 | 12.94 | 81.0 | 13.03 | 78.72 | 13.67 | 76.8 | 13.04 | 77.67 | 12.99 | 77.67 | 13.72 | 82.0 | 13.17 | 82.47 | 13.13 | 80.98 | 12.96 | 80.96 |
| 8 | 13.22 | 72.0 | 12.97 | 74.25 | 14.06 | 65.0 | 13.10 | 74.42 | 13.07 | 74.81 | 13.03 | 72.0 | 12.57 | 69.97 | 12.75 | 71.30 | 13.03 | 71.60 |
| 9 | 24.06 | 77.0 | 23.89 | 78.07 | 24.38 | 75.6 | 23.92 | 77.71 | 23.92 | 77.36 | 23.19 | 76.0 | 23.80 | 77.20 | 24.01 | 77.33 | 24.05 | 77.02 |
| 10 | 27.43 | 69.0 | 27.43 | 69.93 | 27.20 | 69.8 | 27.41 | 69.73 | 27.43 | 69.58 | 27.42 | 69.0 | 27.37 | 69.64 | 27.51 | 69.70 | 27.47 | 69.10 |
| 11 | 28.65 | 61.0 | 28.06 | 63.00 | 28.73 | 61.0 | 28.22 | 62.13 | 28.16 | 61.88 | 28.70 | 62.0 | 28.30 | 60.16 | 28.61 | 61.78 | 28.56 | 60.83 |
| 12 | 24.61 | 81.0 | 24.55 | 80.65 | 25.00 | 82.8 | 24.61 | 80.71 | 24.58 | 80.39 | 24.72 | 81.0 | 24.74 | 81.62 | 24.70 | 81.70 | 24.81 | 81.25 |
| 13 | 28.01 | 48.0 | 27.72 | 49.13 | 27.88 | 50.0 | 27.79 | 48.64 | 27.73 | 48.83 | 27.32 | 46.0 | 28.01 | 46.85 | 27.79 | 48.16 | 27.87 | 47.84 |
| 14 | 25.73 | 93.0 | 26.02 | 91.66 | 26.10 | 89.0 | 25.97 | 91.29 | 25.90 | 91.38 | 25.73 | 93.0 | 25.45 | 92.67 | 25.62 | 92.72 | 25.47 | 92.87 |
| 15 | 29.85 | 60.0 | 29.93 | 57.58 | 29.56 | 60.8 | 29.91 | 57.13 | 29.93 | 57.34 | 29.83 | 60.0 | 29.93 | 59.01 | 29.67 | 60.14 | 29.75 | 59.71 |
| 16 | 26.79 | 45.0 | 26.47 | 44.69 | 26.00 | 47.6 | 26.50 | 43.30 | 26.49 | 43.67 | 27.69 | 47.0 | 26.50 | 48.04 | 26.68 | 45.65 | 27.01 | 45.65 |
| 17 | 21.76 | 70.0 | 21.74 | 69.33 | 22.73 | 70.0 | 21.81 | 70.17 | 21.77 | 70.02 | 22.41 | 72.0 | 21.69 | 70.31 | 21.51 | 70.10 | 21.52 | 69.53 |
| 18 | 21.81 | 92.0 | 22.65 | 92.85 | 22.50 | 87.8 | 22.65 | 91.36 | 22.64 | 91.21 | 21.86 | 92.0 | 22.06 | 91.61 | 22.17 | 91.82 | 22.03 | 92.11 |
| 19 | 20.09 | 52.0 | 19.82 | 57.04 | 19.11 | 54.8 | 19.94 | 58.52 | 19.92 | 58.19 | 20.34 | 50.0 | 19.89 | 50.75 | 20.12 | 52.57 | 20.26 | 52.14 |
| 20 | 34.99 | 45.0 | 34.94 | 37.20 | 33.82 | 45.8 | 34.83 | 36.10 | 34.82 | 36.45 | 34.80 | 47.0 | 34.73 | 44.62 | 35.01 | 45.86 | 34.64 | 44.32 |
| 21 | 28.44 | 66.0 | 28.45 | 66.98 | 28.88 | 63.0 | 28.48 | 66.92 | 28.48 | 66.72 | 28.44 | 66.0 | 28.25 | 66.48 | 28.32 | 65.90 | 28.33 | 65.81 |
| 22 | 23.62 | 69.0 | 23.42 | 71.52 | 22.07 | 80.6 | 23.47 | 71.25 | 23.45 | 71.07 | 24.55 | 70.0 | 24.13 | 65.85 | 23.58 | 69.45 | 23.71 | 69.12 |
| 23 | 8.88 | 98.0 | 8.58 | 93.37 | 9.67 | 95.6 | 8.75 | 93.09 | 8.71 | 93.27 | 8.85 | 98.0 | 8.51 | 96.41 | 9.05 | 97.17 | 9.05 | 98.19 |
| 24 | 21.29 | 78.0 | 21.17 | 81.89 | 21.40 | 81.0 | 21.24 | 80.80 | 21.25 | 80.62 | 21.32 | 78.0 | 21.13 | 79.14 | 21.36 | 77.70 | 21.43 | 78.11 |
#Assign suitable columns to the dataframe that contains the metrics for the applied algorithms
ModelMetrics.columns= ['ModelName','MSE-Temp','MSE-RH','RMSE-Temp','RMSE-RH','R2Score-Temp','R2Score-RH','MAE-Temp','MAE-RH','MAPE-Temp','MAPE-RH',]
ModelMetrics
| ModelName | MSE-Temp | MSE-RH | RMSE-Temp | RMSE-RH | R2Score-Temp | R2Score-RH | MAE-Temp | MAE-RH | MAPE-Temp | MAPE-RH | |
|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | Linear Regression | 0.21 | 23.61 | 0.45 | 4.86 | 0.99 | 0.93 | 0.33 | 3.35 | 3.30 | 5.46 |
| 0 | KNN Regression | 0.88 | 21.92 | 0.94 | 4.68 | 0.98 | 0.93 | 0.69 | 3.33 | 5.81 | 5.08 |
| 0 | SVR-Multioutput | 0.22 | 24.60 | 0.47 | 4.96 | 0.99 | 0.92 | 0.33 | 3.26 | 3.61 | 5.50 |
| 0 | SVR-RegressorChain | 0.22 | 24.64 | 0.47 | 4.96 | 0.99 | 0.92 | 0.33 | 3.27 | 3.58 | 5.49 |
| 0 | Decison Tree Regressor | 0.33 | 1.59 | 0.57 | 1.26 | 0.99 | 1.00 | 0.34 | 0.66 | 3.14 | 1.21 |
| 0 | Gradient Boosted Trees | 0.17 | 2.93 | 0.41 | 1.71 | 1.00 | 0.99 | 0.30 | 1.11 | 3.00 | 1.96 |
| 0 | Gradient Boosted Trees - LGB | 0.08 | 0.81 | 0.28 | 0.90 | 1.00 | 1.00 | 0.19 | 0.60 | 2.73 | 1.03 |
| 0 | Random Forest Regression | 0.11 | 0.48 | 0.33 | 0.69 | 1.00 | 1.00 | 0.20 | 0.38 | 2.03 | 0.69 |
plt.barh(ModelMetrics['ModelName'], ModelMetrics['MAE-RH'], color ='green')
plt.xlabel('RH Prediction Error (in % RH)')
plt.title('RH Prediction MAE for Different Models')
plt.show()
plt.barh(ModelMetrics['ModelName'], ModelMetrics['MAE-Temp'], color ='red')
plt.xlabel('Temperature Prediction Error (in Degrees Celcius)')
plt.title('Temperature Prediction MAE for Different Models')
plt.show()
plt.barh(ModelMetrics['ModelName'], ModelMetrics['MAPE-Temp'], color ='purple')
plt.xlabel('MAPE in Temperature Prediction')
plt.title('Temperature Prediction MAPE for Different Models')
plt.show()
plt.barh(ModelMetrics['ModelName'], ModelMetrics['MAPE-RH'], color ='brown')
plt.xlabel('MAPE in RH Prediction')
plt.title('RH Prediction MAPE for Different Models')
plt.show()